import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('./data/netflix_titles.csv')
df['Decade'] = (df['release_year'] // 10) * 10 # This assumes there's a 'year' column.
# Check existing columns
print(df.columns)
# If 'Decade' is missing, but you have a 'year' column, create 'Decade' from 'year'
if 'release_year' in df.columns and 'Decade' not in df.columns:
df['Decade'] = (df['release_year'].fillna(0).astype(int) // 10) * 10 # Fill missing values with 0 or another placeholder
# If there is no 'year' column, you need to add the 'Decade' column manually or adjust your analysis accordingly.
Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
'release_year', 'rating', 'duration', 'listed_in', 'description',
'Decade'],
dtype='object')
Reference Used: https://github.com/alicelh/class-constrained-t-SNE
import numpy as np
from sklearn.manifold import TSNE
from sklearn.preprocessing import LabelEncoder
import plotly.express as px
# Prepare data for t-SNE; for example, encode categorical data
label_encoders = {}
for column in ['type', 'rating', 'Decade']:
le = LabelEncoder()
df[column] = le.fit_transform(df[column].astype(str))
label_encoders[column] = le
# Select features for t-SNE
features = df[['type', 'rating', 'Decade']]
# Apply t-SNE
tsne = TSNE(n_components=2, random_state=42)
tsne_results = tsne.fit_transform(features)
# Add t-SNE results to the DataFrame
df['tsne-2d-one'] = tsne_results[:, 0]
df['tsne-2d-two'] = tsne_results[:, 1]
# Plot using Plotly Express
fig_tsne = px.scatter(df, x='tsne-2d-one', y='tsne-2d-two', color='Decade',
hover_data=['type', 'rating'], title='Netflix Content t-SNE Visualization')
fig_tsne.show()